In [2]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
In [3]:
df = pd.read_csv("AB_NYC_2019.csv")
In [3]:
df.head(2)
Out[3]:
id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2539 | Clean & quiet apt home by the park | 2787 | John | Brooklyn | Kensington | 40.64749 | -73.97237 | Private room | 149 | 1 | 9 | 19-10-2018 | 0.21 | 6 | 365 |
1 | 2595 | Skylit Midtown Castle | 2845 | Jennifer | Manhattan | Midtown | 40.75362 | -73.98377 | Entire home/apt | 225 | 1 | 45 | 21-05-2019 | 0.38 | 2 | 355 |
Histogram¶
In [4]:
sns.histplot(df["neighbourhood"].head(100))
Out[4]:
<Axes: xlabel='neighbourhood', ylabel='Count'>
In [9]:
sns.histplot(df[df["price"]<1000] , x= "price")
Out[9]:
<Axes: xlabel='price', ylabel='Count'>
In [8]:
sns.histplot(df[df["number_of_reviews"]<100] ,x = "number_of_reviews")
Out[8]:
<Axes: xlabel='number_of_reviews', ylabel='Count'>
Distplot¶
In [12]:
sns.distplot(df["availability_365"])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_23924\1603039620.py:1: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 sns.distplot(df["availability_365"])
Out[12]:
<Axes: xlabel='availability_365', ylabel='Density'>
In [8]:
df["price"].value_counts(bins = 20)
Out[8]:
(-10.001, 500.0] 47862 (500.0, 1000.0] 805 (1000.0, 1500.0] 100 (1500.0, 2000.0] 53 (2500.0, 3000.0] 21 (2000.0, 2500.0] 20 (3500.0, 4000.0] 11 (4500.0, 5000.0] 6 (9500.0, 10000.0] 6 (4000.0, 4500.0] 6 (6000.0, 6500.0] 4 (3000.0, 3500.0] 2 (5500.0, 6000.0] 2 (7000.0, 7500.0] 2 (7500.0, 8000.0] 2 (5000.0, 5500.0] 2 (6500.0, 7000.0] 1 (8000.0, 8500.0] 1 (8500.0, 9000.0] 0 (9000.0, 9500.0] 0 Name: count, dtype: int64
In [20]:
# sns.boxplot(df[df["price"]<500]["price"])
sns.boxplot(df[df["price"]<500]["price"])
Out[20]:
<Axes: ylabel='price'>
In [19]:
sns.boxplot(df[df["reviews_per_month"]<10], y="reviews_per_month")
Out[19]:
<Axes: ylabel='reviews_per_month'>
In [23]:
df2 = sns.load_dataset("titanic")
In [36]:
sns.histplot(data=df2 , x="age")
Out[36]:
<Axes: xlabel='age', ylabel='Count'>
In [28]:
df2[df2["fare"]==0]
Out[28]:
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
179 | 0 | 3 | male | 36.0 | 0 | 0 | 0.0 | S | Third | man | True | NaN | Southampton | no | True |
263 | 0 | 1 | male | 40.0 | 0 | 0 | 0.0 | S | First | man | True | B | Southampton | no | True |
271 | 1 | 3 | male | 25.0 | 0 | 0 | 0.0 | S | Third | man | True | NaN | Southampton | yes | True |
277 | 0 | 2 | male | NaN | 0 | 0 | 0.0 | S | Second | man | True | NaN | Southampton | no | True |
302 | 0 | 3 | male | 19.0 | 0 | 0 | 0.0 | S | Third | man | True | NaN | Southampton | no | True |
413 | 0 | 2 | male | NaN | 0 | 0 | 0.0 | S | Second | man | True | NaN | Southampton | no | True |
466 | 0 | 2 | male | NaN | 0 | 0 | 0.0 | S | Second | man | True | NaN | Southampton | no | True |
481 | 0 | 2 | male | NaN | 0 | 0 | 0.0 | S | Second | man | True | NaN | Southampton | no | True |
597 | 0 | 3 | male | 49.0 | 0 | 0 | 0.0 | S | Third | man | True | NaN | Southampton | no | True |
633 | 0 | 1 | male | NaN | 0 | 0 | 0.0 | S | First | man | True | NaN | Southampton | no | True |
674 | 0 | 2 | male | NaN | 0 | 0 | 0.0 | S | Second | man | True | NaN | Southampton | no | True |
732 | 0 | 2 | male | NaN | 0 | 0 | 0.0 | S | Second | man | True | NaN | Southampton | no | True |
806 | 0 | 1 | male | 39.0 | 0 | 0 | 0.0 | S | First | man | True | A | Southampton | no | True |
815 | 0 | 1 | male | NaN | 0 | 0 | 0.0 | S | First | man | True | B | Southampton | no | True |
822 | 0 | 1 | male | 38.0 | 0 | 0 | 0.0 | S | First | man | True | NaN | Southampton | no | True |
In [29]:
sns.distplot(df2["age"])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_23924\49255041.py:1: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 sns.distplot(df2["age"])
Out[29]:
<Axes: xlabel='age', ylabel='Density'>
In [15]:
sns.distplot(df2["fare"])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_28088\3524834500.py:1: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 sns.distplot(df2["fare"])
Out[15]:
<Axes: xlabel='fare', ylabel='Density'>
In [16]:
sns.boxplot(df2["age"])
Out[16]:
<Axes: ylabel='age'>
In [34]:
sns.violinplot(df2["sex"])
Out[34]:
<Axes: ylabel='sex'>
In [35]:
df2
Out[35]:
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
3 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | First | woman | False | C | Southampton | yes | False |
4 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | Third | man | True | NaN | Southampton | no | True |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
886 | 0 | 2 | male | 27.0 | 0 | 0 | 13.0000 | S | Second | man | True | NaN | Southampton | no | True |
887 | 1 | 1 | female | 19.0 | 0 | 0 | 30.0000 | S | First | woman | False | B | Southampton | yes | True |
888 | 0 | 3 | female | NaN | 1 | 2 | 23.4500 | S | Third | woman | False | NaN | Southampton | no | False |
889 | 1 | 1 | male | 26.0 | 0 | 0 | 30.0000 | C | First | man | True | C | Cherbourg | yes | True |
890 | 0 | 3 | male | 32.0 | 0 | 0 | 7.7500 | Q | Third | man | True | NaN | Queenstown | no | True |
891 rows × 15 columns
In [ ]: